Packages

#install.packages("haven")
library(haven)
library(readxl)
library(ggplot2)
#install.packages("moments")
library(moments)
#install.packages("e1071") # this package will be used to calculate excess kurtosis which is what SPSS calculates 
library(e1071)
#install.packages("plotly") #for fun interactive plots!
library(plotly)

Read in data

CutenessData<- read_excel("/users/uyenchu/Downloads/YEAR 4/Regression/Cutenessdata.xlsx")

Assumption Checking

Normality of Outcome Variable

Histogram

hist1<-ggplot(CutenessData, aes(cuteness)) + geom_histogram(fill="purple", color="black")+ geom_vline(aes(xintercept=mean(cuteness)), color="#000000", linetype="dashed") # this is a histogram with the colors edited for easier viewing, I have also included a mean line (the dashed line) to help visualize things

Skewness and Kurtosis

kurtosis(CutenessData$cuteness) #excess kurtosis calculation
## [1] 0.09250901
skewness(CutenessData$cuteness)
## [1] -0.5656087

Outliers

Histogram

hist1<-ggplot(CutenessData, aes(cuteness)) + geom_histogram(fill="purple", color="black")+ geom_vline(aes(xintercept=mean(cuteness)), color="#000000", linetype="dashed")

hist2<-ggplot(CutenessData, aes(catlove)) + geom_histogram(fill="lightgreen", color="black")+ geom_vline(aes(xintercept=mean(catlove)), color="#000000", linetype="dashed")

Z_Scores

zscoreCuteness <-(CutenessData$cuteness-mean(CutenessData$cuteness))/sd(CutenessData$cuteness)
table(zscoreCuteness)
## zscoreCuteness
##  -3.34215861173202  -2.77707437237283  -2.49453225269323  -2.42389672277333 
##                  2                  6                  1                  2 
##  -2.07071907317384  -1.92944801333404  -1.71754142357434  -1.64690589365444 
##                  5                  1                  5                  5 
##  -1.36436377397485  -1.08182165429525  -1.01118612437535 -0.799279534615656 
##                 33                  4                 14                  8 
## -0.658008474775858  -0.51673741493606 -0.304830825176362 -0.234195295256464 
##                 33                 11                 35                 14 
## 0.0483468244231329  0.330888944102729  0.401524474022628  0.472160003942527 
##                 70                 11                 26                  3 
##  0.613431063782325  0.754702123622124  0.895973183461922   1.10787977322162 
##                 10                 42                 11                 27 
##   1.17851530314152   1.46105742282111 
##                  5                 47

Box Plots

boxplot1<-ggplot(CutenessData, aes(y=cuteness)) + geom_boxplot()
boxplot1

boxplot2<-ggplot(CutenessData, aes(y=catlove)) + geom_boxplot()
boxplot2

Linearity

Scatterplot

scatterplot1<-ggplot(CutenessData, aes(x=catlove, y=cuteness)) + geom_point(color="black",size=2)
ggplotly(scatterplot1) # fun interactive scatterplot!

Correlation

cor.test(CutenessData$cuteness, CutenessData$catlove, method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  CutenessData$cuteness and CutenessData$catlove
## t = 4.2118, df = 429, p-value = 3.089e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1068249 0.2882998
## sample estimates:
##       cor 
## 0.1992702

Regression

lmcuteness<-lm(cuteness~catlove, data=CutenessData)
summary(lmcuteness) #gives R^2, F test, Significance, coefficient
## 
## Call:
## lm(formula = cuteness ~ catlove, data = CutenessData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.51745 -0.38160  0.03449  0.53449  1.26240 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.50970    0.11334  22.144  < 2e-16 ***
## catlove      0.15194    0.03607   4.212 3.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6945 on 429 degrees of freedom
## Multiple R-squared:  0.03971,    Adjusted R-squared:  0.03747 
## F-statistic: 17.74 on 1 and 429 DF,  p-value: 3.089e-05
anova(lmcuteness) # gives residuals, sum of squares
## Analysis of Variance Table
## 
## Response: cuteness
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## catlove     1   8.556  8.5555  17.739 3.089e-05 ***
## Residuals 429 206.902  0.4823                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1